4.1 10K PBMC CITE-seq Visualize ADT and GEX Data

In [1]:
import pandas as pd
import numpy as np
from copy import deepcopy
from ast import literal_eval as make_tuple

import matplotlib.pyplot as plt
%matplotlib inline 
import warnings
warnings.filterwarnings('ignore')
In [2]:
from clustergrammer2 import net
import helper_functions as hf
>> clustergrammer2 backend version 0.5.1
In [3]:
def cell_umi_count(df):
    sum_arr = []
    sum_names = []
    for inst_cell in df:
        sum_arr.append( df[inst_cell].sum() )
        sum_names.append(inst_cell)
    
    ser_sum = pd.Series(data=sum_arr, index=sum_names)
    return ser_sum
In [4]:
df = {}

Load ADT and GEX Data

In [5]:
df['adt'] = pd.read_parquet('../data/10k_pbmc_feature_v3-chem_v3.0.0-cr/processed_data/adt-cat.parquet')
df['adt'].columns = [make_tuple(x) for x in df['adt'].columns]
In [6]:
df['adt'].shape
Out[6]:
(14, 7864)
In [7]:
df['gex'] = pd.read_parquet('../data/10k_pbmc_feature_v3-chem_v3.0.0-cr/processed_data/gex-cat_100-var.parquet')
df['gex'].columns = [make_tuple(x) for x in df['gex'].columns]
In [8]:
df['gex'].shape
Out[8]:
(100, 7864)

UMI Normalize GEX Data

In [9]:
%%time
ser_sum = cell_umi_count(df['gex'])
df['gex'] = df['gex'].div(ser_sum)
print(df['gex'].shape)
print(df['gex'].sum().head())
(100, 7864)
(AAACCCAAGATTGTGA, Cell Type: Myeloid CD14)           1.0
(AAACCCACATCGGTTA, Cell Type: Myeloid CD14)           1.0
(AAACCCAGTACCGCGT, Cell Type: Myeloid CD14)           1.0
(AAACCCAGTATCGAAA, Cell Type: NK Cells CD56, CD16)    1.0
(AAACCCAGTCGTCATA, Cell Type: NK Cells CD56, CD16)    1.0
dtype: float64
CPU times: user 1.1 s, sys: 47.5 ms, total: 1.15 s
Wall time: 1.12 s

Visualize ADT Data

In [10]:
net.load_df(df['adt'])
net.widget()

Visualize GEX Data

Top 100 variable genes

In [11]:
net.load_df(df['gex'])
net.normalize(axis='row', norm_type='zscore')
net.clip(-5,5)
net.widget()